{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'dict'>\n",
      "bathrooms\n",
      "<class 'dict'>\n",
      "['4', '6', '9', '10', '15', '16', '18', '19', '23', '32']\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "file = open('RentListingInquries_train.json','r',encoding='utf-8')\n",
    "json = json.load(file)  \n",
    "#print(json[\"bathrooms\"][\"6\"])\n",
    "print(type(json))\n",
    "\n",
    "#下面先添加 数据的索引\n",
    "intent = []\n",
    "for key in json:\n",
    "    print(key)\n",
    "    print(type(json[key]))\n",
    "    for key2 in json[key]:\n",
    "        intent.append(key2)\n",
    "    break\n",
    "print(intent[:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'float'>\n"
     ]
    }
   ],
   "source": [
    "print(type(1.1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>intent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  intent\n",
       "0      4\n",
       "1      6\n",
       "2      9\n",
       "3     10\n",
       "4     15"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#df2 = pd.DataFrame(np.reshape((n, 1)) ,columns=['intent'])\n",
    "df2 = pd.DataFrame(data=intent, columns=['intent'])\n",
    "df2.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>intent</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>building_id</th>\n",
       "      <th>created</th>\n",
       "      <th>description</th>\n",
       "      <th>display_address</th>\n",
       "      <th>features</th>\n",
       "      <th>latitude</th>\n",
       "      <th>listing_id</th>\n",
       "      <th>longitude</th>\n",
       "      <th>manager_id</th>\n",
       "      <th>photos</th>\n",
       "      <th>price</th>\n",
       "      <th>street_address</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8579a0b0d54db803821a35a4a615e97a</td>\n",
       "      <td>2016-06-16 05:55:27</td>\n",
       "      <td>Spacious 1 Bedroom 1 Bathroom in Williamsburg!...</td>\n",
       "      <td>145 Borinquen Place</td>\n",
       "      <td>[Dining Room, Pre-War, Laundry in Building, Di...</td>\n",
       "      <td>40.7108</td>\n",
       "      <td>7170325.0</td>\n",
       "      <td>-73.9539</td>\n",
       "      <td>a10db4590843d78c784171a107bdacb4</td>\n",
       "      <td>[https://photos.renthop.com/2/7170325_3bb5ac84...</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>145 Borinquen Place</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>b8e75fc949a6cd8225b455648a951712</td>\n",
       "      <td>2016-06-01 05:44:33</td>\n",
       "      <td>BRAND NEW GUT RENOVATED TRUE 2 BEDROOMFind you...</td>\n",
       "      <td>East 44th</td>\n",
       "      <td>[Doorman, Elevator, Laundry in Building, Dishw...</td>\n",
       "      <td>40.7513</td>\n",
       "      <td>7092344.0</td>\n",
       "      <td>-73.9722</td>\n",
       "      <td>955db33477af4f40004820b4aed804a0</td>\n",
       "      <td>[https://photos.renthop.com/2/7092344_7663c19a...</td>\n",
       "      <td>3800.0</td>\n",
       "      <td>230 East 44th</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>cd759a988b8f23924b5a2058d5ab2b49</td>\n",
       "      <td>2016-06-14 15:19:59</td>\n",
       "      <td>**FLEX 2 BEDROOM WITH FULL PRESSURIZED WALL**L...</td>\n",
       "      <td>East 56th Street</td>\n",
       "      <td>[Doorman, Elevator, Laundry in Building, Laund...</td>\n",
       "      <td>40.7575</td>\n",
       "      <td>7158677.0</td>\n",
       "      <td>-73.9625</td>\n",
       "      <td>c8b10a317b766204f08e613cef4ce7a0</td>\n",
       "      <td>[https://photos.renthop.com/2/7158677_c897a134...</td>\n",
       "      <td>3495.0</td>\n",
       "      <td>405 East 56th Street</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>1.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>53a5b119ba8f7b61d4e010512e0dfc85</td>\n",
       "      <td>2016-06-24 07:54:24</td>\n",
       "      <td>A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...</td>\n",
       "      <td>Metropolitan Avenue</td>\n",
       "      <td>[]</td>\n",
       "      <td>40.7145</td>\n",
       "      <td>7211212.0</td>\n",
       "      <td>-73.9425</td>\n",
       "      <td>5ba989232d0489da1b5f2c45f6688adc</td>\n",
       "      <td>[https://photos.renthop.com/2/7211212_1ed4542e...</td>\n",
       "      <td>3000.0</td>\n",
       "      <td>792 Metropolitan Avenue</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>bfb9405149bfff42a92980b594c28234</td>\n",
       "      <td>2016-06-28 03:50:23</td>\n",
       "      <td>Over-sized Studio w abundant closets. Availabl...</td>\n",
       "      <td>East 34th Street</td>\n",
       "      <td>[Doorman, Elevator, Fitness Center, Laundry in...</td>\n",
       "      <td>40.7439</td>\n",
       "      <td>7225292.0</td>\n",
       "      <td>-73.9743</td>\n",
       "      <td>2c3b41f588fbb5234d8a1e885a436cfa</td>\n",
       "      <td>[https://photos.renthop.com/2/7225292_901f1984...</td>\n",
       "      <td>2795.0</td>\n",
       "      <td>340 East 34th Street</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  intent  bathrooms  bedrooms                       building_id  \\\n",
       "0      4        1.0       1.0  8579a0b0d54db803821a35a4a615e97a   \n",
       "1      6        1.0       2.0  b8e75fc949a6cd8225b455648a951712   \n",
       "2      9        1.0       2.0  cd759a988b8f23924b5a2058d5ab2b49   \n",
       "3     10        1.5       3.0  53a5b119ba8f7b61d4e010512e0dfc85   \n",
       "4     15        1.0       0.0  bfb9405149bfff42a92980b594c28234   \n",
       "\n",
       "               created                                        description  \\\n",
       "0  2016-06-16 05:55:27  Spacious 1 Bedroom 1 Bathroom in Williamsburg!...   \n",
       "1  2016-06-01 05:44:33  BRAND NEW GUT RENOVATED TRUE 2 BEDROOMFind you...   \n",
       "2  2016-06-14 15:19:59  **FLEX 2 BEDROOM WITH FULL PRESSURIZED WALL**L...   \n",
       "3  2016-06-24 07:54:24  A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...   \n",
       "4  2016-06-28 03:50:23  Over-sized Studio w abundant closets. Availabl...   \n",
       "\n",
       "       display_address                                           features  \\\n",
       "0  145 Borinquen Place  [Dining Room, Pre-War, Laundry in Building, Di...   \n",
       "1            East 44th  [Doorman, Elevator, Laundry in Building, Dishw...   \n",
       "2     East 56th Street  [Doorman, Elevator, Laundry in Building, Laund...   \n",
       "3  Metropolitan Avenue                                                 []   \n",
       "4     East 34th Street  [Doorman, Elevator, Fitness Center, Laundry in...   \n",
       "\n",
       "   latitude  listing_id  longitude                        manager_id  \\\n",
       "0   40.7108   7170325.0   -73.9539  a10db4590843d78c784171a107bdacb4   \n",
       "1   40.7513   7092344.0   -73.9722  955db33477af4f40004820b4aed804a0   \n",
       "2   40.7575   7158677.0   -73.9625  c8b10a317b766204f08e613cef4ce7a0   \n",
       "3   40.7145   7211212.0   -73.9425  5ba989232d0489da1b5f2c45f6688adc   \n",
       "4   40.7439   7225292.0   -73.9743  2c3b41f588fbb5234d8a1e885a436cfa   \n",
       "\n",
       "                                              photos   price  \\\n",
       "0  [https://photos.renthop.com/2/7170325_3bb5ac84...  2400.0   \n",
       "1  [https://photos.renthop.com/2/7092344_7663c19a...  3800.0   \n",
       "2  [https://photos.renthop.com/2/7158677_c897a134...  3495.0   \n",
       "3  [https://photos.renthop.com/2/7211212_1ed4542e...  3000.0   \n",
       "4  [https://photos.renthop.com/2/7225292_901f1984...  2795.0   \n",
       "\n",
       "            street_address interest_level  \n",
       "0      145 Borinquen Place         medium  \n",
       "1            230 East 44th            low  \n",
       "2     405 East 56th Street         medium  \n",
       "3  792 Metropolitan Avenue         medium  \n",
       "4     340 East 34th Street            low  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num = set(['intent', 'bathrooms', 'bedrooms', 'latitude', 'listing_id', 'longitude', 'price'])\n",
    "for j, key in  enumerate(json):\n",
    "    if key not in num:\n",
    "        df2[key] = \"\"\n",
    "    else:\n",
    "        df2[key] = 1.1\n",
    "    #print(key)\n",
    "    for i,key2 in enumerate(json[key]):\n",
    "        value = json[key][key2]\n",
    "        df2.iat[i, j + 1] = value\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "for i in range(0, len(df2)):\n",
    "    if df2.iat[i, 15] == 'low':\n",
    "        df2.iat[i, 15] = 0\n",
    "    elif df2.iat[i, 15] == 'medium':\n",
    "        df2.iat[i, 15] = 1\n",
    "    elif df2.iat[i, 15] == 'high':\n",
    "        df2.iat[i, 15] = 2\n",
    "df2['interest_level'] = df2['interest_level'].astype(int)\n",
    "'''\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def changeSign(arr):\n",
    "    if arr['interest_level'] == 'low':\n",
    "        arr['interest_level'] = 0\n",
    "    elif arr['interest_level'] == 'medium':\n",
    "        arr['interest_level'] = 1\n",
    "    elif arr['interest_level'] == 'high':\n",
    "        arr['interest_level'] = 2\n",
    "    return arr\n",
    "\n",
    "df3 = df2.apply(changeSign, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#写入文件方便下一次使用\n",
    "df3.to_csv('RentListingInquries.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>intent</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>building_id</th>\n",
       "      <th>created</th>\n",
       "      <th>description</th>\n",
       "      <th>display_address</th>\n",
       "      <th>features</th>\n",
       "      <th>latitude</th>\n",
       "      <th>listing_id</th>\n",
       "      <th>longitude</th>\n",
       "      <th>manager_id</th>\n",
       "      <th>photos</th>\n",
       "      <th>price</th>\n",
       "      <th>street_address</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8579a0b0d54db803821a35a4a615e97a</td>\n",
       "      <td>2016-06-16 05:55:27</td>\n",
       "      <td>Spacious 1 Bedroom 1 Bathroom in Williamsburg!...</td>\n",
       "      <td>145 Borinquen Place</td>\n",
       "      <td>[Dining Room, Pre-War, Laundry in Building, Di...</td>\n",
       "      <td>40.7108</td>\n",
       "      <td>7170325.0</td>\n",
       "      <td>-73.9539</td>\n",
       "      <td>a10db4590843d78c784171a107bdacb4</td>\n",
       "      <td>[https://photos.renthop.com/2/7170325_3bb5ac84...</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>145 Borinquen Place</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>b8e75fc949a6cd8225b455648a951712</td>\n",
       "      <td>2016-06-01 05:44:33</td>\n",
       "      <td>BRAND NEW GUT RENOVATED TRUE 2 BEDROOMFind you...</td>\n",
       "      <td>East 44th</td>\n",
       "      <td>[Doorman, Elevator, Laundry in Building, Dishw...</td>\n",
       "      <td>40.7513</td>\n",
       "      <td>7092344.0</td>\n",
       "      <td>-73.9722</td>\n",
       "      <td>955db33477af4f40004820b4aed804a0</td>\n",
       "      <td>[https://photos.renthop.com/2/7092344_7663c19a...</td>\n",
       "      <td>3800.0</td>\n",
       "      <td>230 East 44th</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>cd759a988b8f23924b5a2058d5ab2b49</td>\n",
       "      <td>2016-06-14 15:19:59</td>\n",
       "      <td>**FLEX 2 BEDROOM WITH FULL PRESSURIZED WALL**L...</td>\n",
       "      <td>East 56th Street</td>\n",
       "      <td>[Doorman, Elevator, Laundry in Building, Laund...</td>\n",
       "      <td>40.7575</td>\n",
       "      <td>7158677.0</td>\n",
       "      <td>-73.9625</td>\n",
       "      <td>c8b10a317b766204f08e613cef4ce7a0</td>\n",
       "      <td>[https://photos.renthop.com/2/7158677_c897a134...</td>\n",
       "      <td>3495.0</td>\n",
       "      <td>405 East 56th Street</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>1.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>53a5b119ba8f7b61d4e010512e0dfc85</td>\n",
       "      <td>2016-06-24 07:54:24</td>\n",
       "      <td>A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...</td>\n",
       "      <td>Metropolitan Avenue</td>\n",
       "      <td>[]</td>\n",
       "      <td>40.7145</td>\n",
       "      <td>7211212.0</td>\n",
       "      <td>-73.9425</td>\n",
       "      <td>5ba989232d0489da1b5f2c45f6688adc</td>\n",
       "      <td>[https://photos.renthop.com/2/7211212_1ed4542e...</td>\n",
       "      <td>3000.0</td>\n",
       "      <td>792 Metropolitan Avenue</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>bfb9405149bfff42a92980b594c28234</td>\n",
       "      <td>2016-06-28 03:50:23</td>\n",
       "      <td>Over-sized Studio w abundant closets. Availabl...</td>\n",
       "      <td>East 34th Street</td>\n",
       "      <td>[Doorman, Elevator, Fitness Center, Laundry in...</td>\n",
       "      <td>40.7439</td>\n",
       "      <td>7225292.0</td>\n",
       "      <td>-73.9743</td>\n",
       "      <td>2c3b41f588fbb5234d8a1e885a436cfa</td>\n",
       "      <td>[https://photos.renthop.com/2/7225292_901f1984...</td>\n",
       "      <td>2795.0</td>\n",
       "      <td>340 East 34th Street</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  intent  bathrooms  bedrooms                       building_id  \\\n",
       "0      4        1.0       1.0  8579a0b0d54db803821a35a4a615e97a   \n",
       "1      6        1.0       2.0  b8e75fc949a6cd8225b455648a951712   \n",
       "2      9        1.0       2.0  cd759a988b8f23924b5a2058d5ab2b49   \n",
       "3     10        1.5       3.0  53a5b119ba8f7b61d4e010512e0dfc85   \n",
       "4     15        1.0       0.0  bfb9405149bfff42a92980b594c28234   \n",
       "\n",
       "               created                                        description  \\\n",
       "0  2016-06-16 05:55:27  Spacious 1 Bedroom 1 Bathroom in Williamsburg!...   \n",
       "1  2016-06-01 05:44:33  BRAND NEW GUT RENOVATED TRUE 2 BEDROOMFind you...   \n",
       "2  2016-06-14 15:19:59  **FLEX 2 BEDROOM WITH FULL PRESSURIZED WALL**L...   \n",
       "3  2016-06-24 07:54:24  A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...   \n",
       "4  2016-06-28 03:50:23  Over-sized Studio w abundant closets. Availabl...   \n",
       "\n",
       "       display_address                                           features  \\\n",
       "0  145 Borinquen Place  [Dining Room, Pre-War, Laundry in Building, Di...   \n",
       "1            East 44th  [Doorman, Elevator, Laundry in Building, Dishw...   \n",
       "2     East 56th Street  [Doorman, Elevator, Laundry in Building, Laund...   \n",
       "3  Metropolitan Avenue                                                 []   \n",
       "4     East 34th Street  [Doorman, Elevator, Fitness Center, Laundry in...   \n",
       "\n",
       "   latitude  listing_id  longitude                        manager_id  \\\n",
       "0   40.7108   7170325.0   -73.9539  a10db4590843d78c784171a107bdacb4   \n",
       "1   40.7513   7092344.0   -73.9722  955db33477af4f40004820b4aed804a0   \n",
       "2   40.7575   7158677.0   -73.9625  c8b10a317b766204f08e613cef4ce7a0   \n",
       "3   40.7145   7211212.0   -73.9425  5ba989232d0489da1b5f2c45f6688adc   \n",
       "4   40.7439   7225292.0   -73.9743  2c3b41f588fbb5234d8a1e885a436cfa   \n",
       "\n",
       "                                              photos   price  \\\n",
       "0  [https://photos.renthop.com/2/7170325_3bb5ac84...  2400.0   \n",
       "1  [https://photos.renthop.com/2/7092344_7663c19a...  3800.0   \n",
       "2  [https://photos.renthop.com/2/7158677_c897a134...  3495.0   \n",
       "3  [https://photos.renthop.com/2/7211212_1ed4542e...  3000.0   \n",
       "4  [https://photos.renthop.com/2/7225292_901f1984...  2795.0   \n",
       "\n",
       "            street_address  interest_level  \n",
       "0      145 Borinquen Place               1  \n",
       "1            230 East 44th               0  \n",
       "2     405 East 56th Street               1  \n",
       "3  792 Metropolitan Avenue               1  \n",
       "4     340 East 34th Street               0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 49352 entries, 0 to 49351\n",
      "Data columns (total 16 columns):\n",
      "intent             49352 non-null object\n",
      "bathrooms          49352 non-null float64\n",
      "bedrooms           49352 non-null float64\n",
      "building_id        49352 non-null object\n",
      "created            49352 non-null object\n",
      "description        49352 non-null object\n",
      "display_address    49352 non-null object\n",
      "features           49352 non-null object\n",
      "latitude           49352 non-null float64\n",
      "listing_id         49352 non-null float64\n",
      "longitude          49352 non-null float64\n",
      "manager_id         49352 non-null object\n",
      "photos             49352 non-null object\n",
      "price              49352 non-null float64\n",
      "street_address     49352 non-null object\n",
      "interest_level     49352 non-null int64\n",
      "dtypes: float64(6), int64(1), object(9)\n",
      "memory usage: 6.0+ MB\n"
     ]
    }
   ],
   "source": [
    "#接下来开始特征工程\n",
    "train = df3\n",
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>latitude</th>\n",
       "      <th>listing_id</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>49352.00000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>49352.000000</td>\n",
       "      <td>4.935200e+04</td>\n",
       "      <td>49352.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.21218</td>\n",
       "      <td>1.541640</td>\n",
       "      <td>40.741545</td>\n",
       "      <td>7.024055e+06</td>\n",
       "      <td>-73.955716</td>\n",
       "      <td>3.830174e+03</td>\n",
       "      <td>0.383105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.50142</td>\n",
       "      <td>1.115018</td>\n",
       "      <td>0.638535</td>\n",
       "      <td>1.262746e+05</td>\n",
       "      <td>1.177912</td>\n",
       "      <td>2.206687e+04</td>\n",
       "      <td>0.626035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.811957e+06</td>\n",
       "      <td>-118.271000</td>\n",
       "      <td>4.300000e+01</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>40.728300</td>\n",
       "      <td>6.915888e+06</td>\n",
       "      <td>-73.991700</td>\n",
       "      <td>2.500000e+03</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>40.751800</td>\n",
       "      <td>7.021070e+06</td>\n",
       "      <td>-73.977900</td>\n",
       "      <td>3.150000e+03</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.00000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>40.774300</td>\n",
       "      <td>7.128733e+06</td>\n",
       "      <td>-73.954800</td>\n",
       "      <td>4.100000e+03</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>10.00000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>44.883500</td>\n",
       "      <td>7.753784e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.490000e+06</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         bathrooms      bedrooms      latitude    listing_id     longitude  \\\n",
       "count  49352.00000  49352.000000  49352.000000  4.935200e+04  49352.000000   \n",
       "mean       1.21218      1.541640     40.741545  7.024055e+06    -73.955716   \n",
       "std        0.50142      1.115018      0.638535  1.262746e+05      1.177912   \n",
       "min        0.00000      0.000000      0.000000  6.811957e+06   -118.271000   \n",
       "25%        1.00000      1.000000     40.728300  6.915888e+06    -73.991700   \n",
       "50%        1.00000      1.000000     40.751800  7.021070e+06    -73.977900   \n",
       "75%        1.00000      2.000000     40.774300  7.128733e+06    -73.954800   \n",
       "max       10.00000      8.000000     44.883500  7.753784e+06      0.000000   \n",
       "\n",
       "              price  interest_level  \n",
       "count  4.935200e+04    49352.000000  \n",
       "mean   3.830174e+03        0.383105  \n",
       "std    2.206687e+04        0.626035  \n",
       "min    4.300000e+01        0.000000  \n",
       "25%    2.500000e+03        0.000000  \n",
       "50%    3.150000e+03        0.000000  \n",
       "75%    4.100000e+03        1.000000  \n",
       "max    4.490000e+06        2.000000  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()\n",
    "#从下面的表 可以看出来 bathrooms ，bedrooms, longitude 可能是有异常点的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAENCAYAAAAfTp5aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl8XGd97/HPb0braLVsWd7tJMZZWwg1SQjcBEhDEghJb3tblpIm0N5cbtuUcgstbSnhAr1wb2kJl1IgJTQJJIGQ5jYhLFnIRkjiYCdOgrd4lS0vsmwt1i7NzHP/OGdGR6OZkSyNpDPj7/v18kuzn+eMxl8983ue8xxzziEiIsUvMt8NEBGRwlCgi4iUCAW6iEiJUKCLiJQIBbqISIlQoIuIlAgFuohIiVCgi4iUCAW6iEiJKJvLjS1atMitWbNmLjcpIlL0Nm3adMw51zzZ4+Y00NesWcPGjRvncpMiIkXPzFqn8jiVXERESoQCXUSkRCjQRURKhAJdRKREKNBFREqEAl1EpEQo0EVESoQCXUSkRCjQJ3H3hv38ztefne9miIhMSoE+idfae9l66MR8N0NEZFIK9EkknSPh3Hw3Q0RkUpMGupl928yOmtmvArc1mdmjZrbT/7lgdps5fxJJRzKpQBeR8JtKD/124MqM2z4J/Mw59zrgZ/71kpRIqocuIsVh0kB3zj0NdGbcfC1wh3/5DuC3Ctyu0EgkHc6BU6iLSMhNt4be4pw7DOD/XFy4JoVLwi+3JFR2EZGQm/VBUTO70cw2mtnGjo6O2d5cwaXKLSq7iEjYTTfQ281sKYD/82iuBzrnbnXOrXfOrW9unvSEG6ET93vmyeQ8N0REZBLTDfQHgev9y9cDDxSmOeGTmuGiHrqIhN1Upi3eAzwHnGlmbWb2h8AXgcvNbCdwuX+9JKmGLiLFYtJzijrn3p/jrssK3JZQSqRLLgp0EQk3HSk6CQ2KikixUKBPQj10ESkWCvRJJDQoKiJFQoE+ibgGRUWkSCjQJ5HUPHQRKRIK9EnEVXIRkSKhQJ9E0qnkIiLFQYE+ifQsF/XQRSTkFOiT0JGiIlIsFOiTUKCLSLFQoE9CJRcRKRYK9EkkNCgqIkVCgT6JeCLVQ5/nhoiITEKBPolUqUUlFxEJOwX6JDQoKiLFQoE+Ca22KCLFQoE+Ca2HLiLFQoE+iURCJRcRKQ4K9EkkNCgqIkVCgT6JsfXQ57khIiKTUKBPIqlZLiJSJBTok4jr0H8RKRIK9DyCUxXVQxeRsFOg5xGcqqgeuoiEnQI9j4R66CJSRBToeSjQRaSYKNDziCdVchGR4qFAz2P8oOg8NkREZAoU6HkEe+hay0VEwk6BnkewzKLVFkUk7BToeWhQVESKyYwC3cw+ZmZbzOxXZnaPmVUVqmFhkNCgqIgUkWkHupktB/4MWO+cOw+IAu8rVMPCQD10ESkmMy25lAHVZlYGxIBDM29SeGhQVESKybQD3Tl3EPgSsB84DPQ45x4pVMPCQIOiIlJMZlJyWQBcC5wGLANqzOyDWR53o5ltNLONHR0d02/pPIgnNA9dRIrHTEouvwnsdc51OOdGgfuBizMf5Jy71Tm33jm3vrm5eQabm3vBHrpKLiISdjMJ9P3ARWYWMzMDLgO2FaZZ4TDu0H+VXEQk5GZSQ98A3Ae8CLzqv9atBWpXKCQ0KCoiRaRsJk92zt0M3FygtoSOBkVFpJjoSNE8goOiOrBIRMJOgZ7HuEFRzXIRkZBToOeh9dBFpJgo0PPQSaJFpJgo0PPQof8iUkwU6HkkNA9dRIqIAj0PrbYoIsVEgZ5HQof+i0gRUaDnkVTJRUSKiAI9j9SgaDRiJJTnIhJyCvQ8Ur3yimhEPXQRCT0Feh6pHnp51DQoKiKhp0DPIzUQWlEW1aCoiISeAj2PhL+AS0XUVHIRkdBToOeRGgitKIuohy4ioadAzyOZrqFHVEMXkdBToOcRDwS6VlsUkbBToOeRTA+KqocuIuGnQM8jdcYibx76PDdGRGQSCvQ8UgOh5WWmQVERCT0Feh6JZJJoxIiYDiwSkfBToOeRSELUjGjENCgqIqGnQM8j1UOPqocuIkVAgZ5HIumttBiJKNBFJPwU6HkknSNiXtlFJRcRCTsFeh7xZJKyaMRbD109dBEJOQV6HokkRMwruSjPRSTsFOh5JJJJyiJG1FDJRURCT4GeR3pQVLNcRKQIKNDzSB9YFNF66CISfgr0PBKOsXnoKrmISMjNKNDNrNHM7jOz7Wa2zczeXKiGhUEy6QLz0Oe7NSIi+ZXN8PlfAX7qnPsvZlYBxArQptCIJ5P+of8aFBWR8Jt2oJtZPXAJcAOAc24EGClMs8IhkYSIDv0XkSIxk5LL6UAH8G9m9pKZfcvMagrUrlBITVvUoKiIFIOZBHoZ8Ebg686584F+4JOZDzKzG81so5lt7OjomMHm5l7CBXroKrmISMjNJNDbgDbn3Ab/+n14AT+Oc+5W59x659z65ubmGWxu7qUPLNKh/yJSBKYd6M65I8ABMzvTv+kyYGtBWhUSiaQjmj70X4EuIuE201kuNwF3+TNc9gAfmnmTwiORdJRFIhoUFZGiMKNAd85tBtYXqC2hk0g6KsvGFudyzmFm890sEZGsdKRoHsFBUUArLopIqCnQ8xgbFE1dV6KLSHgp0PMIrocOOlpURMJNgZ7H2Hro5l9XoItIeCnQ80j4i3NF/R66Di4SkTBToOeRCvRIalBUPXQRCTEFeh4Jl9FDV6CLSIgp0PNIpk5Bp5KLiBQBBXoe6fXQ0yWXeW6QiEgeCvQ80uuhp+ahq4cuIiGmQM8jvR66BkVFpAgo0PPInOWiQVERCTMFeh6ahy4ixUSBnkdq2mL60H/10EUkxBToeaR76FptUUSKgAI9j9QZi7TaoogUAwV6Ds45ko7xh/6rhi4iIaZAzyHVG9eh/yJSLBToOaRmtOjQfxEpFgr0HMb10HVgkYgUAQV6DulAN5VcRKQ4KNBzCPbQ00eKquQiIiGmQM8h26CoVlsUkTBToOcQHBTVaosiUgwU6DlkK7loUFREwkyBnoMGRUWk2CjQc9CgqIgUGwV6DtkHRRXoIhJeCvQcsh76rx66iISYAj2HcYf+64xFIlIEFOg5xBNZSi7qoYtIiCnQc0iFd9TG1nJJ6MAiEQmxGQe6mUXN7CUze6gQDQqLdA09akT8d0mDoiISZoXooX8U2FaA1wmVrPPQVXIRkRCbUaCb2Qrg3cC3CtOc8Mi2fK4GRUUkzGbaQ78F+EsgZ3XZzG40s41mtrGjo2OGm5s7wUA3nYJORIrAtAPdzK4GjjrnNuV7nHPuVufceufc+ubm5ulubs6NX5xLPXQRCb+Z9NDfAlxjZvuA7wHvMLPvFqRVIRBXyUVEisy0A90599fOuRXOuTXA+4DHnXMfLFjL5lkyMCianuWikouIhJjmoeeQ9dB/zUMXkRArK8SLOOeeBJ4sxGuFRdb10NVDF5EQUw89h9SgaJkGRUWkSCjQc0iFdyQwKKoeuoiEmQI9h8S4QVGthy4i4adAzyE4bTH1U4f+i0iYKdBzSGYGuplmuYhIqCnQc0j10Mv8QI9EVEMXkXBToOeQCu/IuB66Al1EwkuBnkNiQg9dgS4i4aZAzyE4bRG8WrpKLiISZgr0HILTFlM/1UMXkTBToOeQOW0xoh66iIScAj2H7NMWFegiEl4K9BzimSWXiOahi0i4KdBzSDqH2digqOahi0jYKdBziCddesoiqOQiIuGnQM8hmXTpddDBn4euHrqIhJgCPYdElh66VlsUkTBToOcQT7p0/RxSg6IKdBEJLwV6Dknn0lMWAcw0D11Ewk2BnsOEQdGITkEnIuGmQM8hc1A0akZCeS4iIaZAzyGzhx6JaFBURMJNgZ5DMnNQVPPQRSTkFOg5JNzEHrrmoYtImCnQc5gwbVHz0EUk5BToOSQnzHJRD11Ewk2BnkM8y6H/6qGLSJiVzXcDwiqZ9A4sunvDfgDae4boHR5NX8/mAxeumqvmiYhMoB56DpnTFs1AFRcRCTMFeg5JN35QNGKmQBeRUFOg5xBPTOyhay0XEQmzaQe6ma00syfMbJuZbTGzjxayYfMt4TIGRc3QmKiIhNlMBkXjwF845140szpgk5k96pzbWqC2zatk0lFZPvb3zquhK9FFJLym3UN3zh12zr3oX+4FtgHLC9Ww+TZh2qKWzxWRkCtIDd3M1gDnAxsK8XphkLkeekSzXEQk5GYc6GZWC/w78OfOuRNZ7r/RzDaa2caOjo6Zbm7OTBwUVQ9dRMJtRoFuZuV4YX6Xc+7+bI9xzt3qnFvvnFvf3Nw8k83NqeSEQVH10EUk3GYyy8WA24Btzrl/KlyTwiGedJRF1UMXkeIxkx76W4DrgHeY2Wb/37sK1K55l3nGooihaYsiEmrTnrbonHsGsEkfWKQy10M3Mxxzk+hd/SNEo0Z9VfmcbE9ESoOOFM0hnsg49J+566H/yd0v8tf3vzo3GxORkqHVFnNIZuuhz1ENva1rkM7+kTnZloiUDgV6DvHkxHnoc9VD7x4Y4cRQyVazRGSWKNBzyBwUDfbQX2nrJmLGecsbCr7dRNJxYigOwNBogqryaMG3ISKlSTX0HDLXQw/20H++8xg/3zk7B0mdGBxNX24/MTQr2xCR0qRAzyGZcZJo83vrSefoH4kzMJKYle12BwL9cI8CXUSmTiWXLBJJx2BGuSOV7c7BwHBiXH29kLoHxgZD1UMXkZOhQM/icM8g8aRjxYLq9G2pevpIPMlIIgkJL/gLHezBHvoR9dBF5CSo5JLF/uMDAKxZWJO+LVVy6RuOp28bHC182aVnQCUXEZkeBXoWrZ1eoK9qiqVvS3XEg4HeH7hcKKmSS1NNhUouInJSFOhZtB4foDxqLGscK7lk66HPxsBoz6D3+utaajmiQBeRk6BAz2J/Zz8rFsQmHFgE0Dc0VhKZlR764Ah1VWUsb4zRrpKLiJwEBXoWrccHxpVbYA576AOjNMbKWdJQSXvvMAkt8SgiU6RAz+CcY//xAVYvHB/oYzX0sRAfGJmNHvooDdXlLGmoJpF0HO8bLvg2RKQ0KdAzdA2M0jscn9BDjzDWQ68uj1JRFpm1QdHG6gqW1FcBqI4uIlOmQM/QerwfgNWBKYsAFqihxyqi1FREZ6Xk0j04SkOsPB3omrooIlOlA4sy7O9MzUHPLLl4id4/kqC2soxE0lsCoNB6BkZprC5nSYMX6Jq6KCJTpR56hlb/oKKVEwZFvZ99Q3Gvh15Z+B66c47uQW9QdGFNBeVR09GiIjJlCvQMrccHWFJfNWHZ2vSh/4kkNRVlxCrKCl5D7xuOk0g6GqsriESMxXVVCnQRmTKVXDLs7+xnVUa5BcZ66ACxyijJpEv30J1z/MuTuzGD91+watrb7vYP+2+IeecSXdJQpUFREZky9dAztB4fYHXTxEAPnuyipqKMWGUZw/Ek8USS3qE4B7sHx62R3jMwyu998zl2He2b8rZ7/IW5Gqv9QK9XoIvI1CnQAwZG4hztHZ4wBx3G5qEDxCqixCqi/nMSdPhzxXcf7U8/ZtP+Tl7Y28mTO45OefvpQI9VANBS75Vc5upcpiJS3BToAakZLqsypizC2JGiADWVZdRUeNWqfv+PAMDeY/3EE0kAdhzp83/2Tnn76ZKL30Nf2lDFwEgifUo6EZF8FOgB+455gZ695DJ2OVYRJVYZ6KH3emWRkUSSA12DALzW3jvu51R0D3orLTb6NfQzFnt/WLYfPnEyuyEipygFesCWQz1EI8a6lroJ91lGDT3dQx+O09E7THnUu3/30fE989fa+0hOcT2WzB76G1YuAOClA93T2R0ROcWc0rNcHt/ezsVnLEpPUXxpfzdnLamjuiI64bGZs1xSi2Z5PfRh1i6uY9vhE+zq6ONtiWZ2dfTRVFNBZ/8IbV2DWWfOZOoZHKWqPJJuz09/dYSFNRU8uPkQ9VXlOZ/3gQunP7NGRErHKdtD33ygmw/fvpF7XtgPeKeT23ygm/NXNWZ9fGqWiwFV5VFifg+9s3+EE0NxVi2oprmukl1H+2jtHGAknuSq85YAsMMvu/QMjnLvxgPjBjmf2H6Uf3xkBzC2jkvQyqYYBzoHNDAqIpM6ZQP9ie3e7JMndnhTDXd39NE3HOd8v8yRKdVDj1VEiZgRjRhV5ZH02i/NdVWsba5l19E+XvPLLVf/+jJgrI7+7Wf28pf3vcIv93WlX/erj+/kq4/v4kjPEN3+0rlBK5ti9A7H0zNgRERyOWUC/TvP7eOl/WNB+tRrXpA/v+c4gyOJ9H05e+j+aouxyrEqVU1FGQe7vUHQxXWVrF1cy+6OPrYf6cXMe60VC6rT9fSHtxwZ97OjdzhdH390W3t66dygVQu8Uk1qBo6ISC6nRKDvOtrL3z2whc88uAXwyiQvt3WzfvUCRuJJnt97nJf2d9NQXc5piyZOWYSxWS41gfp6rCJK0kHUjAU1FZzRXEPvUJxf7DrGmoU1VJVHObOljtfae2k93s/2I72UR42HtxzBOcfPtrXjHNRVlvHY1vb0yS2CljRUUR41DsxRoD+z8xh3b9g/J9sSkcIqyUC/+YFf8fEfvJyuO3/nuVYAXm7rYfOBbn6+swPn4BNXnElVeYSndnTw0v5u3rCycdxslqDU7anaefDywtoKohFj7WJvdszG1i7WtdQCsG5JHbs7+njolcMAfOTSM2jrGmTLoRM8tq2d5Y3VvPdNK3lu93GOnBiaUEOPRozljdVz0kMfGk3wP+7dzN/+x6vsPInpliISDjMKdDO70sx2mNkuM/tkoRqVS+psQkF3PreP937zOboHvDncT7/WwR3PtXLfpjYe3nKE3qFR7tvUxjvPaaG2sow7n93HUzs6WBArZ/2aJt58+kIe3nKE14725iy3wNigaCzQQ6/x56I311UCsHZxbfq+M/2pj2e21DGacNz+7D7OWVrPDRevIWLwwOaD/HznMS4/p4XLz2lhJJGkx18LPdPKphiHeobSBy3Nlu8+38rR3mHKIxFueWznrG5L5FQy1anLMzXtQDezKPA14CrgHOD9ZnZOoRqWKZ5I8vEfvMIl//AE//jIDpxzPLHjKDc/uIUNezu56Z6XGBxJ8JkHt7BmYYyzltTxuYe2cdeG/fSPJPjjt6/ld964nIdeOczjO45yybpmohHj0nXNHO4Zwjk4f1X2AVFvf72fNZUTe+iL/UBvqa+k1r9/3RIv0FNz2jt6h7ni3CUsrK1k/Zombn92H8PxJO88p4XfWL2ABX6QZ9bQAVYuiJFIunEnu2g/McQPNh4YV4rZ3dHHPS/sp2fg5AdQB0bifOOp3Vx8xkJuvOR0fvTqYbbN8QFNHb3DbGrt0oweKRo9A6MMjY4toz00muCBzQc5Fjh15BPbj3LlV56mo3f2Tyc5k3noFwC7nHN7AMzse8C1wNZCNCxoJJ7kY9/fzI9ePczrVzTw1cd3cbhniIe3HOGsJfW8d/0KPvPDrVzzz8+w51g/t3/oTcQqyvi9bz7H//7pdl6/spE3rGyktrKMO55rZWQgyaXrmgF425mL4Ydek9+w4iR76BXje+hmxhnNNbzc1pPuoZ/eXEM0YiSSjivOawHginOX8MLeTuqrynjTaU2URSO8/azF3P/iwQk1dCB9OrxHt7Vz6bpmOvtGeOjVQ4wmHC+3ddMYKyfp4MuPvcZIPMkXfryN/3bpGbTUV7G7ow/n4K1rF7F+zQKO9Ayx/UgvFWXGWUvqWdpQhZlxx7OtHOsb4ZvXrWNtcx13PLePLz/6Grf+wXoARv1vEMPxJI3V5cQqovQMjtJ6fIDRRJJVTTEaYuW80tbDhj3HiVWUcfHahaxZWMP2I71sP3yCloYqfm15A+WRCJvbutnT0cfaxbWcu6yB+19s45bHdtI3HOeC05q4+T3nUBGNsKm1i9FEkvNXLeB1LbW0Hh9g66ETxCqinLe8gbqqMja1dvFKWw+rF8a44LQmGqrL2dPRz5GeIVY0VXPaohqG40n2dvTTOxRn9cIYyxqrOdY3zM72PpLOsXZxLUvqqzjWP8zBrkGqK6KsWBCjqixCe+8wR3qGWBArZ1ljNWZwuHuIroERFtdX0VJXyUgiyaHuIYZGEyxtqKKppoITQ3EOdQ9iBssaq6mrLKOzf4TDPUPEKqIsa6ymPBrhaO8QHb3DLIhV0JI69WDPEN2DI7TUV7GotpLheIJD3YMMjSbTr98zOMrB7kEiZunXP9Y3zKGeIWoroyxvjFEeNQ73DNHRN8zCmgqW+ueqbesaoHtwlKUNVSxtqKZ/JM6BzgGG40lWLKhmUU0lx/tH2N85QHnUWNUUo66qnEPdgxzoGqCxuoLVC2OURY39xwc4cmKIJfVVrFoYYySeZE9HP10DI6xqirGyKUbXwAi7jvYxEk9yRnMtSxuqONg9yGvtfVSVRzizpY6GWDm7jvax62gfzbWVnL20nmjU2HLwBPs7+zltUS3nLKunbyjO5gNddPQOc86yBs5dVk/r8QF+ua+T0USS9aubOGNxDZsPdLNhTyf11eW8Ze1CWuqqeHpnBxv2dnJGcy3vOGsxBvzo1cO80tbNhact5IrzltB6rJ/7NrVxoGuAq85bypXnLeGJHUe589lW4skk1120mresXcRtz+zlB5vaWFhTwZ++Yy2rmmL8zx9uZe+xfuqryvjkVWdzsHuArz2xm7OX1o8L/tkyk0BfDhwIXG8DLpxZcyZyznHTPS/y8JZ2rjpvCW9du4iayjLu29RGrCLK1b+2lIqyKBed3sTzezp55zktXkgDv33+cu5/6SDXv3k14JVE3rp2Ec/sOsYlfqCvWVTD6oUxohHLWu5IifqjosEeeupyc533n/DuDfuJRoyoGc/v6UxPT2yKVZBwjk37unixtZsrzm3hcw9t5e1nLaY86n1Jeuc5Ldz/4kEWxMbX0AHqq8u57OzF/GLXMW57Zq+3L821XP36pTy6tZ0v/GQ7AOcuq+ei0xfyi13H+IeHvbntEQPD+MZTu7PuV2VZhLKIMTia4NJ1zfzG6ibu3rCfC09r4pGt7bzub39M0pE+kColYlDob5GXrmvmP71uEV97Yhfv/r/PFOx1zSCz05+t/VO9LfP1sj0m9Uf8ZG9LfRMMvv5UX2sq7Z/t9yLb68/ktpnK9pox//SRn3torO+5rKGKh7e081n/trrKMpYvqOazD21N33be8npi0XI+43cAK6IR3vemlexs7+PTD3gTLlYvjPFPv/d67t14gL/5f68C8L43reQz15w74RwLs8Gm+/XWzH4XuMI590f+9euAC5xzN2U87kbgRv/qmcCOabZ1EXBsms8tVtrnU4P2ufTNdH9XO+eaJ3vQTHrobcDKwPUVwKHMBznnbgVuncF2ADCzjc659TN9nWKifT41aJ9L31zt70xmufwSeJ2ZnWZmFcD7gAcL0ywRETlZ0+6hO+fiZvanwMNAFPi2c25LwVomIiInZUarLTrnfgz8uEBtmcyMyzZFSPt8atA+l7452d9pD4qKiEi4lOSh/yIip6KiCPS5XmJgPpnZSjN7wsy2mdkWM/vofLdprphZ1MxeMrOH5rstc8HMGs3sPjPb7v++3zzfbZptZvYx/3P9KzO7x8yq5rtNhWZm3zazo2b2q8BtTWb2qJnt9H/mPix9BkIf6HO9xEAIxIG/cM6dDVwE/EmJ72/QR4Ft892IOfQV4KfOubOA11Pi+25my4E/A9Y7587Dm0zxvvlt1ay4Hbgy47ZPAj9zzr0O+Jl/veBCH+gElhhwzo0AqSUGSpJz7rBz7kX/ci/ef/Ll89uq2WdmK4B3A9+a77bMBTOrBy4BbgNwzo04506Fk8eWAdVmVgbEyHLsSrFzzj0NdGbcfC1wh3/5DuC3ZmPbxRDo2ZYYKPmAAzCzNcD5wIb5bcmcuAX4S2B2l5QMj9OBDuDf/DLTt8ws+2L8JcI5dxD4ErAfOAz0OOcemd9WzZkW59xh8DptwOLZ2EgxBHq2BcpLfmqOmdUC/w78uXNubpc9nGNmdjVw1Dm3ab7bMofKgDcCX3fOnQ/0M0tfw8PCrxtfC5wGLANqzOyD89uq0lIMgT6lJQZKiZmV44X5Xc65++e7PXPgLcA1ZrYPr6T2DjP77vw2ada1AW3OudS3r/vwAr6U/Saw1znX4ZwbBe4HLp7nNs2VdjNbCuD/PDobGymGQD+llhgw79RItwHbnHP/NN/tmQvOub92zq1wzq3B+/0+7pwr6Z6bc+4IcMDMzvRvuoxZWHo6ZPYDF5lZzP+cX0aJDwQHPAhc71++HnhgNjYyoyNF58IpuMTAW4DrgFfNbLN/29/4R+VKabkJuMvvqOwBPjTP7ZlVzrkNZnYf8CLebK6XKMEjRs3sHuBtwCIzawNuBr4I3Gtmf4j3h+13Z2XbOlJURKQ0FEPJRUREpkCBLiJSIhToIiIlQoEuIlIiFOgiIiVCgR4CZubMbK1/+Rtm9nfz3aYgM7vdzD4/D9v9z2Z2wMz6zOz8ud5+KfLfy9On+dz05zTLfU+a2R/NrHUyU6Gfhx4m/pGMy4Blzrljgds3462Wd5pzbt9MtuGc+8hMnl9ivgT8qXNuVg7COBU552rnuw0ye9RDP3l7gfenrpjZrwHV89eckrYaKIqDyPzVA0XmlQL95H0H+IPA9euBO4MPMLNKM/uSme03s3a/jFIduP8TZnbYzA6Z2Ycznpsub5jZAjN7yMw6zKzLv7wi8NgnzexzZvYLM+s1s0fMbFG2RvsnULg6cL3MzI6Z2Rv96z8wsyNm1mNmT5vZuTle5wYzeybjtmDJKO++ZzwvYmafMrNW/4QAd5pZg/8afXhHBr9sZrtzPP9iM/ul3+ZfmtnF/u1vN7NXA497zMxeCFx/xsx+y7+8z8w+bmav+K/zfQucdMHMrjazzWbWbWbPmtmvB+7bZ2Z/ZWavAP3ZQt3fl1v83/Uh/3Jl4P5r/dc/YWa7zexK//YmM/s3/zldZvYfU3z/b/ff80f9z8RTZrZ6Or+rfJ/THFbn+iya2TXmndii2//cnp3xPn7C/x30m9ltZtZiZj/xX+sxC5wQwswu8n8X3WZ47pbBAAAGvElEQVT2spm9LXDfDWa2x3/eXjP7/Sm0u3Q45/Rviv+AfXgLDO0AzsYLnAN4PUkHrPEfdwve2g1NQB3wQ+AL/n1XAu3AeUANcLf/3LX+/bcDn/cvLwR+B2/d6DrgB8B/BNrzJLAbWIf3LeFJ4Is52v5pvMW+UtffDWwPXP+wv41Kv/2bA/cF23QD8EzGawfbn3Pfs7Tpw8AuvKVka/EWa/pOttfN8twmoAtvmYQyvG9NXf57VgUMAov8+47gLehW579Pg8DCwO/0BbxSWhPe2iIf8e97I94iShf6v+vr/cdXBp67GW/xuOoc7fws8DzecqnNwLPA5/z7LgB6gMvxOlfLgbP8+34EfB9YAJQDl07x/b8d6MVba70S7yQaz5zs74pJPqdZ9vNJcnwW/dv6/f0sx1smeRdQEXgfnwda/PfgKN7yAOf7+/A4cLP/2OXAceBd/nt2uX+92W/nCeBM/7FLgXPnOzfmNKPmuwHF9I+xQP8U8AX/Q/8oXmg4YA3ecr/9wBmB570Zb5U5gG8TCF3/w5410LNs/w1AV+D6k8CnAtf/GO8MONmeu9b/jx7zr98FfDrHYxv9NjVktok8gTLZvmfZzs+APw5cPxMYBcqCr5vjudcBL2Tc9hxwg3/558Bv45316RHgXv/39XbglYzf6QcD1/8P8A3/8tfxwzdw/w7GwnUf8OFJPjO7gXcFrl8B7PMvfxP4cpbnLMVbF35Blvtyvv+B39X3AvfVAglg5cn8rib7nGZpV87PIvB3wL2B+yLAQeBtgffx9wP3/zvessKp6zfhd2SAvyLwR9+/7WG8P7Y1QDdeJyjrH9hS/6e63/R8B3gab13nOzPua8brUW8ySy/lbng9PPB6gsF1v1tzbcTMYsCX8YIo9ZWzzsyizrmEf/1I4CkDeP+BJ3DO7TKzbcB7zOyHwDV4PaDUaf7+Hm/BoGbGTjKxCK8HOVWT7XumZYzf/1a8P44teP/h88l8bur5qZOfPIW3QFKbf7kLuBQY9q8HZb6Hy/zLq4HrzeymwP0VgfshcPIV/+v9N/2rP3fOXZWlna2B568Esi26thLodM51ZblvKtJtcs71mVmnv83giWIK9jkNyPVZHPceOOeSZnaA8SeqaQ9cHsxyPfVaq4HfNbP3BO4vB55wzvWb2XuBjwO3mdkv8E7nuH0KbS8JqqFPg3OuFW9w9F14ZYKgY3gfwHOdc43+vwY3NrvgMOPXd1+VZ1N/gddrvdA5lzplGWQ/6cdU3INXmrgW2Oqc2+Xf/gH/tt8EGvC+aeTaTj9eEHgPMFsSuG+yfc90CO8/aMoqvFX42rM/PO9zU89P/SFIBfol/uWn8AL9UiYGei4HgL8P7Eujcy7mnLsn8Jj06nbOubucc7X+v6tytHMVY+v5HwDOyLHdJjNrzHJfvvc/ZWXg/lq8kkrmOQQK+TmdzLj3wLy/ICuZ/I92NgfweujB30mNc+6LAM65h51zl+N9y9kO/OsM2l10FOjT94fAO5xz/cEbnXNJvA/Rl81sMXgnxzWzK/yH3AvcYGbn+D3wm/Nsow7vP123mTVN8tip+B7wTuC/49VEg9sZxqtFxoD/lec1XgbONbM3+IOHn0ndMYV9z3QP8DHz1rqv9bf7fedcfAr78mNgnZl9wLwB3vfinUT8If/+Z/H+GF6AV5rZghcqF+J9u5qKfwU+YmYXmqfGzN5tZnVTfD54+/gpM2v2Bwk/DaRO3nEb8CEzu8y8AeLlZnaW805R9hPgX8wbGC83s9Qf85zvf8C7zOyt5i3L+zlgg3Mu2Dsv9Od0MvcC7/b3sxyvozKM9zs6Wd/F+5Z5hZlFzazKzN5mZiv8gdRrzDuV3zDQh1duOmUo0KfJObfbObcxx91/hTfo87yZnQAewwsXnHM/wRuMetx/zON5NnML3gDTMbxBo5/OsM2H8erMF+MNuKXcifeV+CDeSRaez/Mar+EN9D0G7ASeyXhIzn3P4tuMla/2AkN49dKp7Mtx4Gq8cDiON9B2tfOPD/D/0L4IbHHeycXB2/dW59yUzhbj/37/K/DPeCWbXXg17JPxeWAj8Arwqt+mz/uv/wLeGuhfxittPcVYT/Y6vPGE7XiDhH/uP2ey9x+8P9Y3452o+DeAXDM9CvU5zcs5twP4IPBVvM/ye4D3BH4vJ/NaB/C+Tf4N3jlZDwCfwMuyCN7n4RDevl+KV8s/ZWg9dJESYma3453a7lPz3RaZe+qhi4iUCAW6iEiJUMlFRKREqIcuIlIiFOgiIiVCgS4iUiIU6CIiJUKBLiJSIhToIiIl4v8DsW5EO9z4yN0AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fc0a69eb7b8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#接下来看看 各个特征的分布\n",
    "fig = plt.figure()\n",
    "sns.distplot(train.bathrooms.values, bins=30, kde=True)\n",
    "plt.xlabel('Median value of owner-occupied homes', fontsize=12)\n",
    "plt.show()\n",
    "#bathrooms，price，bedrooms 明显数据有不正常的，可以考虑去掉\n",
    "#感兴趣越低的越多。。。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bathrooms\n",
      "2.0\n",
      "2.0\n",
      "2.0\n",
      "2.0\n",
      "2.0\n",
      "2.5\n",
      "3.0\n",
      "3.0\n",
      "3.0\n",
      "3.5\n",
      "10.0\n",
      "bedrooms\n",
      "3.0\n",
      "4.0\n",
      "4.0\n",
      "4.0\n",
      "4.0\n",
      "4.0\n",
      "4.0\n",
      "4.0\n",
      "4.0\n",
      "5.0\n",
      "8.0\n",
      "price\n",
      "6895.0\n",
      "7000.0\n",
      "7390.0\n",
      "7800.0\n",
      "8000.0\n",
      "8895.0\n",
      "9995.0\n",
      "11000.0\n",
      "13000.0\n",
      "15500.0\n",
      "4490000.0\n"
     ]
    }
   ],
   "source": [
    "for i in ['bathrooms','bedrooms','price']:\n",
    "    print(i)\n",
    "    for j in np.arange(0.95,1, 0.005):\n",
    "        print(train[i].quantile(j))\n",
    "#通过下面的数据 可以认为 在0.995分位数之下的数据才是正常的数据,其他删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(48964, 16)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = train[train['bathrooms'] <= 3.5]\n",
    "train = train[train['bedrooms'] <= 5.0]\n",
    "train = train[train['price'] <= 15500.0]\n",
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 48964 entries, 0 to 49351\n",
      "Data columns (total 16 columns):\n",
      "intent             48964 non-null object\n",
      "bathrooms          48964 non-null float64\n",
      "bedrooms           48964 non-null float64\n",
      "building_id        48964 non-null object\n",
      "created            48964 non-null object\n",
      "description        48964 non-null object\n",
      "display_address    48964 non-null object\n",
      "features           48964 non-null object\n",
      "latitude           48964 non-null float64\n",
      "listing_id         48964 non-null float64\n",
      "longitude          48964 non-null float64\n",
      "manager_id         48964 non-null object\n",
      "photos             48964 non-null object\n",
      "price              48964 non-null float64\n",
      "street_address     48964 non-null object\n",
      "interest_level     48964 non-null int64\n",
      "dtypes: float64(6), int64(1), object(9)\n",
      "memory usage: 6.4+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()\n",
    "#直接去掉了400 条？？ 看来可能存在误删的数据，这个可以通过之后的 测试误差来恢复数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "#building_id 类别太多，不能能用独热，可以直接删， create 可以根据月份简单做一个分类, display_address列也很多->删,manager_id, photos,street_address 也直接删掉\n",
    "#features看来得好好处理下\n",
    "\n",
    "#通过下面的输出，发现有些特征没意义，直接删掉\n",
    "train = train.drop([\"intent\", \"description\",\"building_id\",\"display_address\",\"manager_id\",\"photos\",\"street_address\"], axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add(arr):\n",
    "    value = arr[3]\n",
    "    for ele in value:\n",
    "        if ele not in features:\n",
    "            dic[ele] = 1\n",
    "        else:\n",
    "            dic[ele] += 1\n",
    "        features.add(ele)\n",
    "def change(arr):\n",
    "    value = arr[3]\n",
    "    for ele in value:\n",
    "        if ele in features:\n",
    "            arr[ele] = 1\n",
    "    \n",
    "    return arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(48964, 24)\n",
      "========================\n",
      "{'Dogs Allowed', 'Dishwasher', 'No Fee', 'Cats Allowed', 'Laundry in Building', 'Pre-War', 'Laundry in Unit', 'Dining Room', 'High Speed Internet', 'Fitness Center', 'Doorman', 'Hardwood Floors', 'Elevator', 'Roof Deck', 'Outdoor Space'}\n"
     ]
    }
   ],
   "source": [
    "import threading\n",
    "import math\n",
    "\n",
    "#print(features)\n",
    "#...这也太多了把，算了 还是直接删掉把, 实际 可能某些特性出现次数很多,这时候这个特性可以保留下来 作为一个特征交给模型去处理,试试\n",
    "#貌似 把出现次数定位限定到一定范围 对数据 的统计更有用,其他字符串的数据 也可以这么处理,根据出现次数  确定 要不要作为一个特征处理\n",
    "#可以理解 对公寓 敢不敢兴趣, 决定因素 是 地段,能否养宠物 等因素,而这些因素 可能都是 文字取描述的,可见 数据的预处理 对模型的性能有决定性作用\n",
    "\n",
    "features = set()\n",
    "dic = {}\n",
    "train2 = train.copy()\n",
    "\n",
    "#1.统计特征出现次数,\n",
    "train2.apply(add, axis=1)\n",
    "\n",
    "#2.确定了 哪些特征被选出来, 出现次数 为 1000的话 特征太多了，为了训练模型简单， 尽量 减少特征到 30左右\n",
    "for ele in dic:\n",
    "    if dic[ele] <= 3500:\n",
    "        features.remove(ele)\n",
    "    else:\n",
    "        train2[ele] = 0\n",
    "\n",
    "print(train2.shape)\n",
    "        \n",
    "print(\"========================\")\n",
    "print(features)\n",
    "\n",
    "train2 = train2.apply(change, axis = 1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "train2 = train2.drop([\"features\", \"created\", \"listing_id\"], axis = 1)\n",
    "train2.to_csv('FE_RentListingInquries.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>created</th>\n",
       "      <th>features</th>\n",
       "      <th>latitude</th>\n",
       "      <th>listing_id</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>interest_level</th>\n",
       "      <th>Dining Room</th>\n",
       "      <th>...</th>\n",
       "      <th>Chelsea</th>\n",
       "      <th>** SPRAWLING SUNDRENCHED 2BR HOME * NO FEE! * WALLS OF WINDOWS * CHEF INSPIRED EAT-IN KITCHEN * 2 BLKS TO TRAIN **</th>\n",
       "      <th>Lot's of Light</th>\n",
       "      <th>2 Full Bathrooms</th>\n",
       "      <th>attached parking garage</th>\n",
       "      <th>Private Balcony</th>\n",
       "      <th>1.5 Marble Baths</th>\n",
       "      <th>24hr Doorman</th>\n",
       "      <th>** WICKED W50s STEAL! * MASSIVE 1BR HOME * HI END RENOVATIONS * ELEV/LNDRY BLDG * REAL PIX * STEPS TO THE PARK! **</th>\n",
       "      <th>Available 06/04/16     Firepalce</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>49347</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016-04-05 03:58:33</td>\n",
       "      <td>[Elevator, Dishwasher, Hardwood Floors]</td>\n",
       "      <td>40.8433</td>\n",
       "      <td>6824800.0</td>\n",
       "      <td>-73.9396</td>\n",
       "      <td>2800.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49348</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016-04-02 02:25:31</td>\n",
       "      <td>[Common Outdoor Space, Cats Allowed, Dogs Allo...</td>\n",
       "      <td>40.8198</td>\n",
       "      <td>6813268.0</td>\n",
       "      <td>-73.9578</td>\n",
       "      <td>2395.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49349</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016-04-26 05:42:03</td>\n",
       "      <td>[Dining Room, Elevator, Pre-War, Laundry in Bu...</td>\n",
       "      <td>40.5765</td>\n",
       "      <td>6927093.0</td>\n",
       "      <td>-73.9554</td>\n",
       "      <td>1850.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49350</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016-04-19 02:47:33</td>\n",
       "      <td>[Pre-War, Laundry in Unit, Dishwasher, No Fee,...</td>\n",
       "      <td>40.7448</td>\n",
       "      <td>6892816.0</td>\n",
       "      <td>-74.0017</td>\n",
       "      <td>4195.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49351</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016-04-20 05:34:00</td>\n",
       "      <td>[Dining Room, Elevator, Laundry in Building, D...</td>\n",
       "      <td>40.7594</td>\n",
       "      <td>6901023.0</td>\n",
       "      <td>-73.9712</td>\n",
       "      <td>4280.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 1561 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       bathrooms  bedrooms              created  \\\n",
       "49347        1.0       3.0  2016-04-05 03:58:33   \n",
       "49348        1.0       2.0  2016-04-02 02:25:31   \n",
       "49349        1.0       1.0  2016-04-26 05:42:03   \n",
       "49350        1.0       2.0  2016-04-19 02:47:33   \n",
       "49351        1.0       3.0  2016-04-20 05:34:00   \n",
       "\n",
       "                                                features  latitude  \\\n",
       "49347            [Elevator, Dishwasher, Hardwood Floors]   40.8433   \n",
       "49348  [Common Outdoor Space, Cats Allowed, Dogs Allo...   40.8198   \n",
       "49349  [Dining Room, Elevator, Pre-War, Laundry in Bu...   40.5765   \n",
       "49350  [Pre-War, Laundry in Unit, Dishwasher, No Fee,...   40.7448   \n",
       "49351  [Dining Room, Elevator, Laundry in Building, D...   40.7594   \n",
       "\n",
       "       listing_id  longitude   price  interest_level  Dining Room  \\\n",
       "49347   6824800.0   -73.9396  2800.0               0            0   \n",
       "49348   6813268.0   -73.9578  2395.0               1            0   \n",
       "49349   6927093.0   -73.9554  1850.0               1            1   \n",
       "49350   6892816.0   -74.0017  4195.0               1            0   \n",
       "49351   6901023.0   -73.9712  4280.0               2            1   \n",
       "\n",
       "                     ...                 Chelsea  \\\n",
       "49347                ...                       0   \n",
       "49348                ...                       0   \n",
       "49349                ...                       0   \n",
       "49350                ...                       0   \n",
       "49351                ...                       0   \n",
       "\n",
       "       ** SPRAWLING SUNDRENCHED 2BR HOME * NO FEE! * WALLS OF WINDOWS * CHEF INSPIRED EAT-IN KITCHEN * 2 BLKS TO TRAIN **  \\\n",
       "49347                                                  0                                                                    \n",
       "49348                                                  0                                                                    \n",
       "49349                                                  0                                                                    \n",
       "49350                                                  0                                                                    \n",
       "49351                                                  0                                                                    \n",
       "\n",
       "       Lot's of Light  2 Full Bathrooms  attached parking garage  \\\n",
       "49347               0                 0                        0   \n",
       "49348               0                 0                        0   \n",
       "49349               0                 0                        0   \n",
       "49350               0                 0                        0   \n",
       "49351               0                 0                        0   \n",
       "\n",
       "       Private Balcony  1.5 Marble Baths  24hr Doorman  \\\n",
       "49347                0                 0             0   \n",
       "49348                0                 0             0   \n",
       "49349                0                 0             0   \n",
       "49350                0                 0             0   \n",
       "49351                0                 0             0   \n",
       "\n",
       "       ** WICKED W50s STEAL! * MASSIVE 1BR HOME * HI END RENOVATIONS * ELEV/LNDRY BLDG * REAL PIX * STEPS TO THE PARK! **  \\\n",
       "49347                                                  0                                                                    \n",
       "49348                                                  0                                                                    \n",
       "49349                                                  0                                                                    \n",
       "49350                                                  0                                                                    \n",
       "49351                                                  0                                                                    \n",
       "\n",
       "       Available 06/04/16     Firepalce  \n",
       "49347                                 0  \n",
       "49348                                 0  \n",
       "49349                                 0  \n",
       "49350                                 0  \n",
       "49351                                 0  \n",
       "\n",
       "[5 rows x 1561 columns]"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train2.tail()\n",
    "#终于处理完数据了 耗费劲"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "#接下来 数值类型数据 \n",
    "Outcome = train2['interest_level']\n",
    "num = ['bathrooms', 'bedrooms', 'latitude', 'price', 'longitude']\n",
    "x_train_num = train2[num]\n",
    "x_train_cat = train2.drop(num, axis = 1)\n",
    "\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "mn_x = MinMaxScaler()\n",
    "temp = mn_x.fit_transform(x_train_num)\n",
    "x_train_num = pd.DataFrame(data=temp, columns=num, index =train.index)\n",
    "x_train_num.head()\n",
    "\n",
    "FE_train = pd.concat([ x_train_num,  x_train_cat, Outcome], axis = 1)\n",
    "FE_train.to_csv('FE_RentListingInquries2.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
